import numpy as np
import pandas as pd
import scanpy as sc
sc.settings.verbosity = 3
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')
results_file = 'results_HL.h5ad'
adata_r = sc.read_10x_mtx('/Users/bahawarsdhillon/Desktop/BIO 257 - Applied Genomics/Scanpy-Project/filtered_feature_bc_matrix/', var_names='gene_symbols',cache=False)
adata_r.var_names_make_unique()
adata_r
scanpy==1.6.0 anndata==0.7.4 umap==0.4.6 numpy==1.19.2 scipy==1.5.3 pandas==1.1.3 scikit-learn==0.23.2 statsmodels==0.12.0 python-igraph==0.8.3 leidenalg==0.8.2 --> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.
AnnData object with n_obs × n_vars = 3394 × 36601
var: 'gene_ids', 'feature_types'
sc.pp.filter_cells(adata_r, min_genes=200)
sc.pp.filter_genes(adata_r, min_cells=3)
sc.pp.filter_cells(adata_r, max_counts=39766)
sc.pp.filter_cells(adata_r, max_genes=5942)
adata_r.var['mt'] = adata_r.var_names.str.startswith('MT-') # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata_r, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
adata = adata_r[adata_r.obs.pct_counts_mt < 10, :]
filtered out 413 cells that have less than 200 genes expressed filtered out 16445 genes that are detected in less than 3 cells filtered out 56 cells that have more than 39766 counts filtered out 10 cells that have more than 5942 genes expressed
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
sc.pl.highly_variable_genes(adata)
adata.raw = adata
normalizing counts per cell
finished (0:00:00)
extracting highly variable genes
finished (0:00:00)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
sc.pp.regress_out(adata, 'pct_counts_mt')
sc.pp.scale(adata, max_value=10)
regressing out pct_counts_mt
sparse input is densified and may lead to high memory use
... storing 'feature_types' as categorical
finished (0:01:08)
cell_cycle_genes = [x.strip() for x in open('cell_cycle.txt')]
print(len(cell_cycle_genes))
# Split into 2 lists
s_genes = cell_cycle_genes[:43]
g2m_genes = cell_cycle_genes[43:]
cell_cycle_genes = [x for x in cell_cycle_genes if x in adata.var_names]
print(len(cell_cycle_genes))
97 94
sc.tl.score_genes_cell_cycle(adata, s_genes=s_genes, g2m_genes=g2m_genes)
sc.pl.violin(adata, ['S_score', 'G2M_score'],
jitter=0.4)
calculating cell cycle phase
computing score 'S_score'
WARNING: genes are not in var_names and ignored: ['MLF1IP']
finished: added
'S_score', score of gene set (adata.obs).
645 total control genes are used. (0:00:00)
computing score 'G2M_score'
WARNING: genes are not in var_names and ignored: ['FAM64A', 'HN1']
finished: added
'G2M_score', score of gene set (adata.obs).
769 total control genes are used. (0:00:00)
--> 'phase', cell cycle phase (adata.obs)
... storing 'phase' as categorical
sc.tl.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution=0.5)
sc.pl.umap(adata, color=['leiden'])
computing PCA
on highly variable genes
with n_comps=50
finished (0:00:00)
computing neighbors
using 'X_pca' with n_pcs = 40
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:04)
running Leiden clustering
finished: found 11 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
sc.pl.umap(adata, color="phase")
adata_r
AnnData object with n_obs × n_vars = 2915 × 20156
obs: 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
# sc.pp.regress_out(adata, 'phase')
# sc.pp.scale(adata, max_value=10)
tfh = []
th1 = []
treg = []
memory = []
cd4 = []
cd8= []
with open("/Users/bahawarsdhillon/Desktop/BIO 257 - Applied Genomics/Scanpy-Project/CD8_ARS.txt",'r') as myfile:
for line in myfile:
line=line.strip('\n')
cd8.append(line)
with open("/Users/bahawarsdhillon/Desktop/BIO 257 - Applied Genomics/Scanpy-Project/CD4_Ars.txt",'r') as myfile:
for line in myfile:
line=line.strip('\n')
cd4.append(line)
with open("/Users/bahawarsdhillon/Desktop/BIO 257 - Applied Genomics/Scanpy-Project/TH1_ARS.txt",'r') as myfile:
for line in myfile:
line=line.strip('\n')
th1.append(line)
with open("/Users/bahawarsdhillon/Desktop/BIO 257 - Applied Genomics/Scanpy-Project/tfh_Ars.txt",'r') as myfile:
for line in myfile:
line=line.strip('\n')
tfh.append(line)
with open("/Users/bahawarsdhillon/Desktop/BIO 257 - Applied Genomics/Scanpy-Project/Treg_ARS.txt",'r') as myfile:
for line in myfile:
line=line.strip('\n')
treg.append(line)
with open("/Users/bahawarsdhillon/Desktop/BIO 257 - Applied Genomics/Scanpy-Project/Memory_ARS.txt",'r') as myfile:
for line in myfile:
line=line.strip('\n')
memory.append(line)
print("TH1: \n", len(th1))
th1 = [x for x in th1 if x in adata.var_names]
print(len(th1))
print("TFH: \n", len(tfh))
tfh = [x for x in tfh if x in adata.var_names]
print(len(tfh))
print("CD4: \n", len(cd4))
cd4 = [x for x in cd4 if x in adata.var_names]
print(len(cd4))
print("CD8: \n", len(cd8))
cd8 = [x for x in cd8 if x in adata.var_names]
print(len(cd8))
print("TREG: \n", len(treg))
treg = [x for x in treg if x in adata.var_names]
print(len(treg))
print("Memory: \n", len(memory))
memory = [x for x in memory if x in adata.var_names]
print(len(memory))
TH1: 138 138 TFH: 72 61 CD4: 9 9 CD8: 146 129 TREG: 27 23 Memory: 33 29
print(treg)
['FOXP3', 'IKZF2', 'TNFRSF4', 'CAPG', 'TNFRSF18', 'CD74', 'CTLA4', 'RGS1', 'CCND2', 'SELL', 'SERINC3', 'SAMSN1', 'IFNGR1', 'GIMAP7', 'LTB', 'BTG1', 'IL7R', 'SDF4', 'CD2', 'SHISA5', 'GPX4', 'MBNL1', 'PELI1']
print(memory)
['RPS27', 'RPS23', 'RPS24', 'RPS14', 'RPS19', 'RPS29', 'RPS16', 'IL7R', 'RPL18A', 'RPS5', 'RPL37A', 'BTG1', 'RPS28', 'RPL36A', 'RPL13', 'RPLP2', 'RPS15A', 'RPL12', 'RPL36', 'RPL9', 'RPS9', 'RPLP1', 'RPS15', 'RPS21', 'RPL38', 'CCND2', 'MALAT1', 'RABAC1', 'EIF4A2']
print(tfh)
['PDCD1', 'RGS10', 'CXCR5', 'TOX', 'TOX2', 'PTRH1', 'ANGPTL2', 'MARCKSL1', 'SMCO4', 'TNFSF8', 'PPP1R14B', 'TNFAIP8', 'HIF1A', 'LPP', 'MAF', 'CD160', 'SH2D1A', 'CD200', 'TBC1D4', 'TPI1', 'RPSA', 'HMGB1', 'ICOS', 'GAPDH', 'PRKCA', 'PTPRCAP', 'ZAP70', 'PTPN11', 'DENND2D', 'ZFP36L1', 'FYN', 'GDI2', 'LIMD2', 'PKM', 'NT5E', 'CTSB', 'PFKL', 'PGAM1', 'MATK', 'TRIM8', 'COX14', 'ASAP1', 'GNG2', 'P2RX7', 'LRMP', 'CD3G', 'ALDOA', 'GNA13', 'ISG15', 'RAB37', 'MMD', 'FAM162A', 'BORCS8', 'DDIT4', 'PTP4A2', 'CD82', 'MIF', 'PFKP', 'GIMAP5', 'EEA1', 'BATF']
allgenes = adata.var_names
iglist = []
for i in range(1, len(allgenes)):
if allgenes[i].startswith("IG"):
print(allgenes[i])
iglist.append(allgenes[i])
sc.tl.score_genes(adata, iglist, score_name="IG")
list_BCells=['CD19', 'SYK', 'PIK3AP1', 'AKT1', 'AKT2', 'AKT3', 'CHUK', 'IKBKB', 'IKBKG']
sc.tl.score_genes(adata, iglist, score_name="bcell")
sc.pl.umap(adata, color=["IG", "bcell"])
sc.tl.score_genes(adata, th1, score_name="th1")
sc.tl.score_genes(adata, tfh, score_name="tfh")
sc.tl.score_genes(adata, treg, score_name="treg")
sc.tl.score_genes(adata, cd8, score_name="cd8")
sc.tl.score_genes(adata, cd4, score_name="cd4")
sc.tl.score_genes(adata, memory, score_name="memory")
sc.pl.umap(adata, color=["cd4", "cd8"])
sc.pl.umap(adata, color=["tfh", "th1", "treg", "memory"])
IGSF3
IGSF9
IGSF8
IGKC
IGKV4-1
IGKV3-15
IGKV3-20
IGFBP2
IGFBP5
IGSF10
IGF2BP2
IGFBP7
IGIP
IGF2R
IGF2BP3
IGFBP3
IGHEP2
IGF2
IGSF22
IGHMBP2
IGSF9B
IGFBP6
IGF1
IGHA2
IGHE
IGHG4
IGHG2
IGHGP
IGHA1
IGHEP1
IGHG1
IGHG3
IGHD
IGHM
IGHV1-2
IGHV1-3
IGHV3-7
IGHV3-23
IGHV1-24
IGHV3-30
IGHV1-69D
IGHV3-74
IGHV5-78
IGDCC3
IGDCC4
IGF1R
IGSF6
IGFBP4
IGF2BP1
IGFLR1
IGFL2
IGFL2-AS1
IGLON5
IGLV1-51
IGLV3-21
IGLV2-8
IGLC1
IGLC2
IGLC3
IGLC5
IGLC7
IGLL1
IGBP1
IGBP1-AS1
computing score 'IG'
finished: added
'IG', score of gene set (adata.obs).
1146 total control genes are used. (0:00:00)
computing score 'bcell'
finished: added
'bcell', score of gene set (adata.obs).
1146 total control genes are used. (0:00:00)
computing score 'th1'
finished: added
'th1', score of gene set (adata.obs).
841 total control genes are used. (0:00:00)
computing score 'tfh'
finished: added
'tfh', score of gene set (adata.obs).
649 total control genes are used. (0:00:00)
computing score 'treg'
finished: added
'treg', score of gene set (adata.obs).
249 total control genes are used. (0:00:00)
computing score 'cd8'
finished: added
'cd8', score of gene set (adata.obs).
938 total control genes are used. (0:00:00)
computing score 'cd4'
finished: added
'cd4', score of gene set (adata.obs).
149 total control genes are used. (0:00:00)
computing score 'memory'
finished: added
'memory', score of gene set (adata.obs).
97 total control genes are used. (0:00:00)
sc.pl.umap(adata, color="CD4")
sc.pl.umap(adata, color="CD8A", size=4)
print(cd8)
['GZMA', 'CCL5', 'GZMB', 'KLRD1', 'NKG7', 'CD8A', 'KLRK1', 'LGALS1', 'KLRC1', 'KLRG1', 'LGALS3', 'GZMK', 'CX3CR1', 'PLAC8', 'ZEB2', 'CTSD', 'H2AFZ', 'EPSTI1', 'DNAJC15', 'CD48', 'CTSW', 'SELPLG', 'TMSB4X', 'PLEK', 'TM6SF1', 'S100A10', 'VIM', 'ZYX', 'ABRACL', 'CRIP1', 'RACGAP1', 'CYBA', 'CCR2', 'PYCARD', 'SUB1', 'RAP1B', 'GLIPR2', 'EMP3', 'KRTCAP2', 'ID2', 'RPA2', 'STMN1', 'S100A6', 'AHNAK', 'DAPK2', 'OSTF1', 'ITGAX', 'ANXA6', 'CD47', 'ARL4C', 'RUNX3', 'IFNGR1', 'LFNG', 'RASGRP2', 'REEP5', 'CHSY1', 'IL18RAP', 'JAK1', 'SGK1', 'KLRC2', 'TBX21', 'SEMA4A', 'SLAMF7', 'TXNDC5', 'ST3GAL6', 'ACTG1', 'CCNB2', 'CTSC', 'BIRC5', 'ITGB2', 'SP100', 'LAMB3', 'UBE2C', 'ANXA2', 'CENPW', 'CALM1', 'COX5B', 'COX5A', 'SNX10', 'EFHD2', 'LSP1', 'SERPINB9', 'HMGB2', 'THY1', 'RBM3', 'SNX5', 'PPP3CA', 'ARL6IP5', 'CKS1B', 'SEC61B', 'GNA15', 'NPM3', 'UBE2G2', 'ACTB', 'UGCG', 'S1PR4', 'CLIC1', 'LMNB1', 'CKS2', 'TPM4', 'TAGLN2', 'NDUFB7', 'PTP4A3', 'H2AFY', 'HMGN2', 'YWHAQ', 'GGH', 'DNAJC9', 'CMPK1', 'GIMAP7', 'CCND3', 'HCST', 'SMC2', 'ANAPC5', 'DEK', 'LSM5', 'CMTM7', 'PCNA', 'H2AFV', 'RAN', 'ANP32E', 'CENPA', 'SLBP', 'DUT', 'TMPO', 'H2AFX', 'TUBB4B', 'UBE2S', 'TUBA1B']
naive = ["CD8A", "CD4"]
sc.tl.score_genes(adata, naive, score_name="naive")
sc.pl.umap(adata, color="naive")
computing score 'naive'
finished: added
'naive', score of gene set (adata.obs).
100 total control genes are used. (0:00:00)
list_NKCells=["NCAM1","GZMK", "GNLY", "GZMA"]
sc.tl.score_genes(adata, list_NKCells, score_name="nk")
sc.pl.umap(adata, color=["leiden", "nk"], ncols=1)
computing score 'nk'
finished: added
'nk', score of gene set (adata.obs).
150 total control genes are used. (0:00:00)
sc.pl.umap(adata, color="CD8A", size=25, color_map="magma")
sc.pl.umap(adata, color="NCAM1", size=25, color_map="magma" )
newmacs= ["ITGAM", "CD68", "CD163"]
sc.tl.score_genes(adata, newmacs, score_name="n macs")
sc.pl.umap(adata, color="n macs", size=25, color_map="magma")
computing score 'n macs'
finished: added
'n macs', score of gene set (adata.obs).
150 total control genes are used. (0:00:00)
sc.pl.umap(adata, color="CD68", size=25, color_map="magma")
list_NaiveT=["CCR7", "IL7R", "LEF1"]
sc.tl.score_genes(adata, list_NaiveT, score_name="naiveT")
sc.pl.umap(adata, color="naiveT", size=25, color_map="magma")
computing score 'naiveT'
finished: added
'naiveT', score of gene set (adata.obs).
150 total control genes are used. (0:00:00)
list_ActivatedT=["CD25", "CD69"]
sc.tl.score_genes(adata, list_ActivatedT, score_name="activeT")
sc.pl.umap(adata, color="activeT", size=25, color_map="magma")
computing score 'activeT'
WARNING: genes are not in var_names and ignored: ['CD25']
finished: added
'activeT', score of gene set (adata.obs).
50 total control genes are used. (0:00:00)
list_TCells=["CD8", "CD3E", "CD4"]
sc.tl.score_genes(adata, list_TCells, score_name="allT")
sc.pl.umap(adata, color="allT", size=25, color_map="magma")
computing score 'allT'
WARNING: genes are not in var_names and ignored: ['CD8']
finished: added
'allT', score of gene set (adata.obs).
100 total control genes are used. (0:00:00)
['CXCR6', 'CCR2', 'ID2', 'NKG7', 'LGALS3', 'PLAC8', 'GZMB', 'LGALS1', 'DNAJC15', 'S100A10', 'GNA15', 'EPSTI1', 'TMSB4X', 'CRIP1', 'ANXA1', 'S100A4', 'CCL5', 'GLIPR2', 'CTSW', 'SUB1', 'LSP1', 'SELPLG', 'ANXA6', 'AHNAK', 'S100A6', 'BHLHE40', 'CD48', 'PLEK', 'IFNGR1', 'VIM', 'ABRACL', 'CYBA', 'CD47', 'SH3BGRL3', 'ZYX', 'S100A13', 'PYCARD', 'CTSD', 'TAGLN2', 'IL18R1', 'ACTG1', 'ST3GAL6', 'OSTF1', 'ADGRE5', 'HMGB2', 'KLRC1', 'EMP3', 'CDC42', 'RORA', 'ITGB7', 'TPST2', 'TBX21', 'S1PR4', 'CALM1', 'MYL6', 'AGPAT4', 'IL18RAP', 'RAP1B', 'PPP3CA', 'CD52', 'RBM3', 'LFNG', 'RASGRP2', 'GLRX', 'KRTCAP2', 'CORO1A', 'RNF138', 'ITGA4', 'PODNL1', 'ARL6IP5', 'ARHGDIB', 'H2AFZ', 'S100A11', 'TXNDC5', 'CLIC1', 'CFL1', 'RUNX3', 'GABARAPL2', 'CROT', 'REEP5', 'ITGB2', 'SERPINB9', 'MRPL33', 'HSP90B1', 'MYL12A', 'GMFG', 'COX5A', 'THY1', 'ARPC5', 'MYO1F', 'PPIB', 'CTSC', 'ATP1A1', 'STMN1', 'COX17', 'CYTH4', 'ECH1', 'PPP1CA', 'PSMB3', 'SLAMF1', 'COX5B', 'ACTB', 'CAPZB', 'GAPDH', 'RPA2', 'NDUFB7', 'PRELID1', 'IL2RB', 'H2AFY', 'ACOT7', 'ANXA2', 'SPN', 'ENO1', 'CD2', 'GGH', 'NPTN', 'RNF166', 'TSPO', 'SEC61B', 'APRT', 'MYO1G', 'SEC61G', 'UBE2G2', 'ACTR3', 'MTPN', 'ESYT1', 'DOK2', 'SMS', 'CLTA', 'CDC42EP3', 'FAM107B', 'TPM4', 'TMED2', 'GIMAP7', 'IDH3A', 'AP2S1', 'TALDO1', 'ANXA5']
list_BCells=['CD19', 'SYK', 'PIK3AP1', 'AKT1', 'AKT2', 'AKT3', 'CHUK', 'IKBKB', 'IKBKG', 'IGSF9', 'IGSF8', 'IGKC', 'IGKV3-15', 'IGHGP', 'IGFLR1', 'IGLV3-21', 'IGBP1-AS1']
print("IG: \n", len(iglist))
treg = [x for x in iglist if x in adata.uns.hvg]
print(len(iglist))
extracting highly variable genes
finished (0:00:00)
hvgs = adata.var_vector(k="highly_variable")
allgenes = adata.var_names
thelists = []
for d in range (0, len(allgenes)):
if (hvgs[d]==True):
thelists.append(allgenes[d])
print(len(thelists))
cov=0
File "<ipython-input-114-6fe652b3466a>", line 11 if iglist[d] is in thelists: ^ SyntaxError: invalid syntax
iglistv2=[]
for d in range (0,len(iglist)):
if iglist[d] in thelists:
iglistv2.append(iglist[d])
print(iglistv2)
['IGSF9', 'IGSF8', 'IGKC', 'IGKV3-15', 'IGHGP', 'IGFLR1', 'IGLV3-21', 'IGBP1-AS1']
cd8v2 = []
for i in range (0, len(cd8)):
if (cd8[i] not in cd4):
cd8v2.append(cd8[i])
print(cd8v2)
print(len(cd8))
print(len(cd8v2))
['GZMA', 'CCL5', 'GZMB', 'KLRD1', 'NKG7', 'CD8A', 'KLRK1', 'LGALS1', 'KLRC1', 'KLRG1', 'LGALS3', 'GZMK', 'CX3CR1', 'PLAC8', 'ZEB2', 'CTSD', 'H2AFZ', 'EPSTI1', 'DNAJC15', 'CD48', 'CTSW', 'SELPLG', 'TMSB4X', 'PLEK', 'TM6SF1', 'S100A10', 'VIM', 'ZYX', 'ABRACL', 'CRIP1', 'RACGAP1', 'CYBA', 'CCR2', 'PYCARD', 'SUB1', 'RAP1B', 'GLIPR2', 'EMP3', 'KRTCAP2', 'ID2', 'RPA2', 'STMN1', 'AHNAK', 'DAPK2', 'OSTF1', 'ITGAX', 'ANXA6', 'CD47', 'ARL4C', 'RUNX3', 'IFNGR1', 'LFNG', 'RASGRP2', 'REEP5', 'CHSY1', 'IL18RAP', 'JAK1', 'SGK1', 'KLRC2', 'TBX21', 'SEMA4A', 'SLAMF7', 'TXNDC5', 'ST3GAL6', 'CCNB2', 'CTSC', 'BIRC5', 'ITGB2', 'SP100', 'LAMB3', 'UBE2C', 'ANXA2', 'CENPW', 'CALM1', 'COX5B', 'COX5A', 'SNX10', 'EFHD2', 'LSP1', 'SERPINB9', 'HMGB2', 'THY1', 'RBM3', 'SNX5', 'PPP3CA', 'ARL6IP5', 'CKS1B', 'SEC61B', 'GNA15', 'NPM3', 'UBE2G2', 'ACTB', 'UGCG', 'S1PR4', 'CLIC1', 'LMNB1', 'CKS2', 'TPM4', 'TAGLN2', 'NDUFB7', 'PTP4A3', 'H2AFY', 'HMGN2', 'YWHAQ', 'GGH', 'DNAJC9', 'CMPK1', 'GIMAP7', 'CCND3', 'HCST', 'SMC2', 'ANAPC5', 'DEK', 'LSM5', 'CMTM7', 'PCNA', 'H2AFV', 'RAN', 'ANP32E', 'CENPA', 'SLBP', 'DUT', 'TMPO', 'H2AFX', 'TUBB4B', 'UBE2S', 'TUBA1B'] 129 127
cd4v2 = []
for i in range (0, len(cd4)):
if (cd4[i] not in cd8):
cd4v2.append(cd4[i])
print(len(cd4))
print(len(cd4v2))
9 7
nks = ["GNLY", "NKG7", "KLRB1"]
sc.tl.score_genes(adata, nks, score_name="nks")
sc.pl.umap(adata, color="nks")
computing score 'nks'
finished: added
'nks', score of gene set (adata.obs).
150 total control genes are used. (0:00:00)
list_BIG=['CD19', 'SYK', 'PIK3AP1', 'AKT1', 'AKT2', 'AKT3', 'CHUK', 'IKBKB', 'IKBKG', 'IGSF9', 'IGSF8', 'IGKC', 'IGKV3-15', 'IGHGP', 'IGFLR1', 'IGLV3-21', 'IGBP1-AS1']
sc.tl.leiden(adata, resolution=0.3)
sc.tl.rank_genes_groups(adata, "leiden", method="wilcoxon")
sc.tl.marker_gene_overlap(adata, allmarkers)
running Leiden clustering
finished: found 11 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
ranking genes
finished: added to `.uns['rank_genes_groups']`
'names', sorted np.recarray to be indexed by group ids
'scores', sorted np.recarray to be indexed by group ids
'logfoldchanges', sorted np.recarray to be indexed by group ids
'pvals', sorted np.recarray to be indexed by group ids
'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:05)
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Tfh | 61.0 | 61.0 | 61.0 | 61.0 | 61.0 | 61.0 | 61.0 | 61.0 | 61.0 | 61.0 | 61.0 |
| Th1 | 138.0 | 138.0 | 138.0 | 138.0 | 138.0 | 138.0 | 138.0 | 138.0 | 138.0 | 138.0 | 138.0 |
| NK | 3.0 | 3.0 | 3.0 | 3.0 | 3.0 | 3.0 | 3.0 | 3.0 | 3.0 | 3.0 | 3.0 |
| B | 17.0 | 17.0 | 17.0 | 17.0 | 17.0 | 17.0 | 17.0 | 17.0 | 17.0 | 17.0 | 17.0 |
| CD4 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 |
| CD8 | 127.0 | 127.0 | 127.0 | 127.0 | 127.0 | 127.0 | 127.0 | 127.0 | 127.0 | 127.0 | 127.0 |
| 4 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
| 8 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
allmarkers = {"Tfh": tfh, "Th1": th1, "NK": list_NKCells, "B": list_BIG, "CD4": cd4v2, "CD8": cd8v2, "4": ["CD4"], "8": ["CD8A"]}
sc.tl.marker_gene_overlap(adata, allmarkers, top_n_markers=50, normalize="reference")
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Tfh | 0.081967 | 0.000000 | 0.0 | 0.000000 | 0.049180 | 0.016393 | 0.000000 | 0.000000 | 0.065574 | 0.016393 | 0.016393 | 0.032787 | 0.016393 | 0.016393 |
| Th1 | 0.007246 | 0.007246 | 0.0 | 0.050725 | 0.050725 | 0.036232 | 0.000000 | 0.050725 | 0.094203 | 0.021739 | 0.072464 | 0.050725 | 0.021739 | 0.057971 |
| NK | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| B | 0.000000 | 0.117647 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.117647 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| CD4 | 0.000000 | 0.142857 | 0.0 | 0.142857 | 0.142857 | 0.000000 | 0.000000 | 0.000000 | 0.142857 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.142857 |
| CD8 | 0.000000 | 0.000000 | 0.0 | 0.015748 | 0.015748 | 0.039370 | 0.000000 | 0.094488 | 0.047244 | 0.110236 | 0.062992 | 0.031496 | 0.031496 | 0.031496 |
| 4 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 8 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
sc.pl.umap(adata, color="leiden")
sc.tl.score_genes(adata, list_BIG, score_name="Bv2")
sc.pl.umap(adata, color="Bv2")
computing score 'Bv2'
finished: added
'Bv2', score of gene set (adata.obs).
499 total control genes are used. (0:00:00)
pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(10)
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | CYTOR | MS4A1 | RPS12 | IL7R | CXCL13 | FCER1G | TNFRSF13C | CCL5 | GAPDH | MKI67 | CST3 | RPS7 | GZMB | AL157895.1 |
| 1 | HLA-A | IGHM | RPL32 | TPT1 | AC004585.1 | FTL | BANK1 | NKG7 | ACTB | STMN1 | IFITM3 | CCL17 | SERPINF1 | TPSAB1 |
| 2 | IL32 | CD79A | RPL13 | PGM2L1 | SESN1 | PSAP | CD79A | CTSW | PFN1 | HMGB2 | VIM | TMSB4X | IRF7 | TPSB2 |
| 3 | CTLA4 | CD37 | RPS3A | GPR171 | RBPJ | TIMP1 | CD83 | GZMK | IL32 | HMGN2 | CD63 | LGALS1 | IRF4 | HPGDS |
| 4 | DUSP4 | IGHD | RPL10 | KLRB1 | LINC01871 | TYROBP | MS4A1 | KLRK1 | PKM | HMGB1 | HLA-DRA | RACK1 | GPR183 | CMA1 |
| 5 | B2M | CD83 | RPS3 | RNF19A | KIAA0319L | LYZ | BASP1 | CST7 | SUB1 | TUBB | FKBP1A | GSTP1 | CLIC3 | CPA3 |
| 6 | TRAC | CD74 | RPL34 | NR3C1 | ID2 | AIF1 | CD37 | HCST | CFL1 | TUBA1B | ANXA2 | TXN | EGLN3 | KIT |
| 7 | HLA-B | FCER2 | RPS6 | RPS15A | ARMH1 | IFI30 | HLA-DQA1 | GZMA | RANBP1 | ASPM | HLA-DPB1 | RPL17 | JCHAIN | GATA2 |
| 8 | MIR4435-2HG | HLA-DQA1 | RPL3 | SESN1 | PGM2L1 | PLAUR | NFKBID | AC020916.1 | ENO1 | TPX2 | HLA-DPA1 | NPM1 | LILRB4 | HDC |
| 9 | ARID5B | HLA-DRA | RPL21 | ANXA1 | RNASET2 | C5AR1 | CD74 | FOSB | SUMO2 | CENPF | GSN | BCL2A1 | TCF4 | IL1RL1 |
sc.pl.umap(adata, color="leiden", size=25)
adata
AnnData object with n_obs × n_vars = 2359 × 20156
obs: 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'S_score', 'G2M_score', 'phase', 'leiden', 'IG', 'bcell', 'th1', 'tfh', 'treg', 'cd8', 'cd4', 'memory', 'naive', 'nk', 'n macs', 'naiveT', 'activeT', 'allT', 'nks', 'Bv2', 'HRS', 'myscore'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'phase_colors', 'rank_genes_groups'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
sc.pl.umap(adata, color=["n macs", "allT"])
sc.pl.umap(adata, color="phase")
sc.pl.umap(adata, color="total_counts")
sc.pl.umap(adata, color=["CD4","CD163","n_genes"], color_map="magma")
list_HRS= ["TNFRSF8", "CD274", "IRF4", "PAX5", "FUT4"]
sc.tl.score_genes(adata, list_HRS, score_name="HRS")
sc.pl.umap(adata, color="HRS", color_map="magma")
computing score 'HRS'
finished: added
'HRS', score of gene set (adata.obs).
200 total control genes are used. (0:00:00)
sc.pl.umap(adata, color=["CD3E", "allT"], color_map="magma")
sc.pl.umap(adata, color=["IL2RA", "CD69"], color_map="magma")
sc.pl.umap(adata, color=["CCR7", "IL7R", "LEF1"], color_map="magma")
sc.pl.umap(adata, color=["IL2RA", "FOXP3", "CTLA4", "LAG3", "TNFRSF18", "IKZF2", "IKZF4", "LGMN"], color_map="magma")
adata
AnnData object with n_obs × n_vars = 2359 × 20156
obs: 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'S_score', 'G2M_score', 'phase', 'leiden', 'IG', 'bcell', 'th1', 'tfh', 'treg', 'cd8', 'cd4', 'memory', 'naive', 'nk', 'n macs', 'naiveT', 'activeT', 'allT', 'nks', 'Bv2', 'HRS', 'myscore'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'phase_colors', 'rank_genes_groups'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
list_Macrophages=["CD68", "ITGAM", "MPEG1", "CD163", "IRF7", "IFI44"]
macrophagedict = {"Macrophage": list_Macrophages}
sc.pl.matrixplot(adata, macrophagedict, groupby='leiden')
sc.pl.umap(adata, color=["IL9R", "CD33", "IL5RA"], color_map="magma")
sc.pl.violin(adata, keys=["IL9R", "CD33", "IL5RA"], groupby="leiden")
sc.pl.violin(adata, keys=["CD4", "CD8A"], groupby="leiden")
adata
AnnData object with n_obs × n_vars = 2359 × 20156
obs: 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'S_score', 'G2M_score', 'phase', 'leiden', 'IG', 'bcell', 'th1', 'tfh', 'treg', 'cd8', 'cd4', 'memory', 'naive', 'nk', 'n macs', 'naiveT', 'activeT', 'allT', 'nks', 'Bv2', 'HRS', 'myscore'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'phase_colors', 'rank_genes_groups'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
sc.pl.umap(adata, color=["leiden", "CD8A", "CD4"], color_map="magma")
sc.pl.umap(adata, color=["leiden", "PDCD1", "CXCR5", "BCL6", "CD3E", "CD4"], color_map="magma")
sc.pl.umap(adata, color=["leiden", "IL2RA", "CD4", "CD8A"], color_map="magma")
sc.tl.score_genes(adata, ["IL2RA", "CD4", "CD8A"], score_name="myscore")
sc.pl.umap(adata, color="myscore", color_map="magma")
computing score 'myscore'
finished: added
'myscore', score of gene set (adata.obs).
100 total control genes are used. (0:00:00)
sc.pl.violin(adata, keys=["th1", "tfh", "treg", "memory", "cd4", "cd8"], groupby="leiden")
sc.pl.violin(adata, keys=["CD19", "CR2", "CD83", "NT5E"] , groupby="leiden")
sc.pl.umap(adata, color=["leiden", "CD19", "CR2", "CD83", "NT5E"], color_map="magma")
sc.pl.umap(adata, color="leiden", color_map="magma")
t = adata[adata.obs['leiden'].isin(["0", "1", "3", "5", "6", "8"])]
sc.pl.umap(t, color="leiden")
t2 = t
sc.pp.highly_variable_genes(t2, min_mean=0.0125, max_mean=3, min_disp=0.5)
sc.pl.highly_variable_genes(t2)
t2.raw = t2
sc.tl.pca(t, svd_solver='arpack')
sc.tl.leiden(t2, resolution=0.4)
sc.tl.rank_genes_groups(t2, 'leiden', method='wilcoxon')
sc.pl.rank_genes_groups(t2, n_genes=25, sharey=False)
pd.DataFrame(t2.uns['rank_genes_groups']['names']).head(10)
extracting highly variable genes
finished (0:00:00)
Trying to set attribute `.uns` of view, copying.
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
computing PCA
on highly variable genes
with n_comps=50
finished (0:00:00)
running Leiden clustering
finished: found 6 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
ranking genes
finished: added to `.uns['rank_genes_groups']`
'names', sorted np.recarray to be indexed by group ids
'scores', sorted np.recarray to be indexed by group ids
'logfoldchanges', sorted np.recarray to be indexed by group ids
'pvals', sorted np.recarray to be indexed by group ids
'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:06)
pd.DataFrame(t2.uns['rank_genes_groups']['names']).head(10)
| 0 | 1 | 2 | 3 | 4 | 5 | |
|---|---|---|---|---|---|---|
| 0 | HLA-A | AC004585.1 | RPS12 | STMN1 | NKG7 | EHD2 |
| 1 | CYTOR | SESN1 | RPL10 | TUBA1B | GZMA | COX7A1 |
| 2 | MIR4435-2HG | PGM2L1 | RPS3 | TUBB | GZMB | CAV2 |
| 3 | CTLA4 | RNASET2 | RPL32 | HMGB1 | CST7 | SPARC |
| 4 | PMAIP1 | NR3C1 | RPL13 | H2AFZ | PRF1 | CNN3 |
| 5 | IL32 | CXCL13 | RPL21 | HMGN2 | CCL4 | CEBPD |
| 6 | HLA-B | TXNIP | RPL3 | HMGB2 | CCL5 | SNCG |
| 7 | ARID5B | VIM | RPS3A | CENPM | HCST | RBPMS |
| 8 | B2M | RBPJ | RPS27A | TYMS | HAVCR2 | NR2F2 |
| 9 | DUSP4 | LINC01871 | RPL34 | CKS1B | GZMK | SPARCL1 |
sc.pp.neighbors(t2, n_neighbors=10, n_pcs=40)
sc.tl.umap(t2)
sc.pl.umap(t2, color="CD8A", color_map="magma")
computing neighbors
using 'X_pca' with n_pcs = 40
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:03)
sc.pl.umap(t2, color="CD4", color_map="magma")
sc.pl.umap(t2, color="leiden", color_map="magma")
sc.pl.umap(t2, color="phase")
sc.tl.score_genes(t2, cd4v2, score_name="cd4T")
sc.tl.score_genes(t2, cd8v2, score_name="cd8T")
sc.tl.score_genes(t2, th1, score_name="th1T")
sc.tl.score_genes(t2, treg, score_name="tregT")
sc.tl.score_genes(t2, memory, score_name="memoryT")
sc.pl.umap(t2, color=["cd4T", "cd8T"], color_map="magma")
sc.pl.umap(t2, color=["th1T", "tregT", "memoryT"], color_map="magma")
computing score 'cd4T'
finished: added
'cd4T', score of gene set (adata.obs).
200 total control genes are used. (0:00:00)
computing score 'cd8T'
finished: added
'cd8T', score of gene set (adata.obs).
1043 total control genes are used. (0:00:00)
computing score 'th1T'
finished: added
'th1T', score of gene set (adata.obs).
1091 total control genes are used. (0:00:00)
computing score 'tregT'
finished: added
'tregT', score of gene set (adata.obs).
348 total control genes are used. (0:00:00)
computing score 'memoryT'
finished: added
'memoryT', score of gene set (adata.obs).
844 total control genes are used. (0:00:00)
list_Th1=["IFNG", "TBX21", "CD4"]
sc.tl.score_genes(t2, list_Th1, score_name="Th1v2")
sc.pl.umap(t2, color="Th1v2", color_map="magma")
computing score 'Th1v2'
finished: added
'Th1v2', score of gene set (adata.obs).
100 total control genes are used. (0:00:00)
sc.pl.umap(t2, color=["leiden", "CD4", "CD8A"], color_map="magma")
sc.pl.violin(t2, keys=["IFNG", "TBX21"], groupby="leiden")
sc.pl.violin(t2, keys=["S_score","G2M_score", "n_genes"], groupby="leiden")
t2
AnnData object with n_obs × n_vars = 1617 × 20156
obs: 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'S_score', 'G2M_score', 'phase', 'leiden', 'allT', 'cd4T', 'cd8T', 'th1T', 'tregT', 'memoryT', 'Th1v2'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'phase_colors', 'rank_genes_groups'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
t3 = t2[t2.obs['leiden'].isin(["0", "1", "2", "4", "5"])]
sc.pp.highly_variable_genes(t3, min_mean=0.0125, max_mean=3, min_disp=0.5)
t3.raw = t3
sc.tl.pca(t, svd_solver='arpack')
sc.tl.leiden(t3, resolution=0.3)
sc.tl.rank_genes_groups(t3, 'leiden', method='wilcoxon')
pd.DataFrame(t3.uns['rank_genes_groups']['names']).head(10)
running Leiden clustering
finished: found 3 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
ranking genes
finished: added to `.uns['rank_genes_groups']`
'names', sorted np.recarray to be indexed by group ids
'scores', sorted np.recarray to be indexed by group ids
'logfoldchanges', sorted np.recarray to be indexed by group ids
'pvals', sorted np.recarray to be indexed by group ids
'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:04)
| 0 | 1 | 2 | |
|---|---|---|---|
| 0 | RPL34 | CTLA4 | CEBPD |
| 1 | RPS27 | SH2D1A | CD63 |
| 2 | RPL30 | B2M | NFIB |
| 3 | RPL13 | TRAC | GNG12 |
| 4 | RPS3A | HLA-A | COL4A2 |
| 5 | RPL39 | IL32 | TSPAN4 |
| 6 | RPL10 | ACTB | CCN1 |
| 7 | RPS3 | CD27 | COL4A1 |
| 8 | RPL32 | DUSP4 | NNMT |
| 9 | RPL11 | HLA-B | IGFBP7 |
sc.pl.violin(t3, keys=["CD4", "CD8A"], groupby="leiden")
sc.pl.violin(t3, keys=["GZMK", "GZMB", "GNLY"], groupby="leiden")
sc.pl.umap(t3, color="leiden")
sc.pl.violin(t3, keys=["IL2RA", "CD69"], groupby="leiden")
sc.pl.umap(t, color="leiden")
t4 = adata[adata.obs['leiden'].isin(["0", "1", "3", "5", "6", "8"])]
sc.pl.umap(t4, color=["leiden", "CD4", "CD8A"], color_map="magma")
sc.tl.score_genes(t4, ["CD8A"], score_name="CD8X")
t4
computing score 'CD8X'
finished: added
'CD8X', score of gene set (adata.obs).
50 total control genes are used. (0:00:00)
AnnData object with n_obs × n_vars = 1617 × 20156
obs: 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'S_score', 'G2M_score', 'phase', 'leiden', 'allT', 'CD8A', 'CD8X'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'phase_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
sc8 = t4.obs_vector(k='CD8X')
print(np.median(sc8))
print(np.std(sc8))
print(np.median(sc8)+ np.std(sc8))
-0.20651205 0.637557 0.431045
t4.obs.columns
Index(['n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts',
'total_counts_mt', 'pct_counts_mt', 'S_score', 'G2M_score', 'phase',
'leiden', 'allT', 'CD8A'],
dtype='object')
cd8 = t4[t4.obs.CD8X > 0.431045, :]
cd8
sc.pl.umap(t4, color="leiden")
sc.pl.umap(cd8, color="leiden")
sc.tl.leiden(cd8, resolution=0.6)
sc.pl.umap(cd8, color=['leiden'])
sc.tl.rank_genes_groups(cd8, 'leiden', method='wilcoxon')
pd.DataFrame(cd8.uns['rank_genes_groups']['names']).head(5)
running Leiden clustering
finished: found 3 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
ranking genes
finished: added to `.uns['rank_genes_groups']`
'names', sorted np.recarray to be indexed by group ids
'scores', sorted np.recarray to be indexed by group ids
'logfoldchanges', sorted np.recarray to be indexed by group ids
'pvals', sorted np.recarray to be indexed by group ids
'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:00)
| 0 | 1 | 2 | |
|---|---|---|---|
| 0 | RPS27 | CTLA4 | MT-ND4 |
| 1 | RPL21 | HERPUD1 | MT-CO2 |
| 2 | RPL17 | DUSP4 | MT-ND3 |
| 3 | RPS28 | TRAC | RPL3 |
| 4 | RPS23 | GAPDH | VIM |
sc.tl.score_genes(t4, ["CD4"], score_name="CD4X")
sc4 = t4.obs_vector(k='CD4X')
print(np.median(sc4))
print(np.std(sc4))
print(np.median(sc4)+ np.std(sc4))
computing score 'CD4X'
finished: added
'CD4X', score of gene set (adata.obs).
50 total control genes are used. (0:00:00)
-0.34476745
0.5185398
0.17377234
cd4 = t4[t4.obs.CD4X > 0.17377234, :]
cd4
sc.pl.umap(t4, color="leiden")
sc.pl.umap(cd4, color="leiden")
sc.pl.umap(cd4, color="CD8A", color_map="magma")
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-445-e730e9131adb> in <module> ----> 1 sc.pl.umap(cd4, color="CD8A", color_map="magma") ~/miniconda3/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py in umap(adata, **kwargs) 613 If `show==False` a :class:`~matplotlib.axes.Axes` or a list of it. 614 """ --> 615 return embedding(adata, 'umap', **kwargs) 616 617 ~/miniconda3/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py in embedding(adata, basis, color, gene_symbols, use_raw, sort_order, edges, edges_width, edges_color, neighbors_key, arrows, arrows_kwds, groups, components, layer, projection, img_key, crop_coord, alpha_img, bw, library_id, color_map, palette, size, frameon, legend_fontsize, legend_fontweight, legend_loc, legend_fontoutline, vmax, vmin, add_outline, outline_width, outline_color, ncols, hspace, wspace, title, show, save, ax, return_fig, **kwargs) 226 itertools.product(color, idx_components) 227 ): --> 228 color_vector, categorical = _get_color_values( 229 adata, 230 value_to_plot, ~/miniconda3/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py in _get_color_values(adata, value_to_plot, groups, palette, use_raw, gene_symbols, layer) 1037 values = adata.raw.obs_vector(value_to_plot) 1038 else: -> 1039 values = adata.obs_vector(value_to_plot, layer=layer) 1040 1041 ### ~/miniconda3/lib/python3.8/site-packages/anndata/_core/anndata.py in obs_vector(self, k, layer) 1364 ) 1365 layer = None -> 1366 return get_vector(self, k, "obs", "var", layer=layer) 1367 1368 def var_vector(self, k, *, layer: Optional[str] = None) -> np.ndarray: ~/miniconda3/lib/python3.8/site-packages/anndata/_core/index.py in get_vector(adata, k, coldim, idxdim, layer) 156 157 if (in_col + in_idx) == 2: --> 158 raise ValueError( 159 f"Key {k} could be found in both .{idxdim}_names and .{coldim}.columns" 160 ) ValueError: Key CD8A could be found in both .var_names and .obs.columns
t4
AnnData object with n_obs × n_vars = 1617 × 20156
obs: 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'S_score', 'G2M_score', 'phase', 'leiden', 'allT', 'CD8A', 'CD8X', 'CD4X'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'phase_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
t_cytotoxic=["GZMK", "GNLY", "GZMA"]
t_Tregs=["IL2RA", "FOXP3", "CTLA4", "LAG3", "TNFRSF18", "IKZF2", "IKZF4", "LGMN"]
t_Th1=["IFNG", "TBX21", "CD3E", "CD4"]
t_Th2=["GATA3", "CRTH2", "IL4", "IL13", "CD3E", "CD4"]
t_Th17=["CD161", "CCR4", "CD3E", "CD4"]
t_TFH=["PDCD1", "CXCR5", "BCL6", "CD3E", "CD4"]
sc.tl.score_genes(t4, t_Tregs, score_name="Tregs")
sc.tl.score_genes(t4, t_Th1, score_name="TH1")
sc.tl.score_genes(t4, t_Th17, score_name="TH17")
sc.tl.score_genes(t4, t_Th2, score_name="TH2")
sc.tl.score_genes(t4, t_TFH, score_name="TFH")
sc.tl.score_genes(t4, t_cytotoxic, score_name="cytotoxic")
sc.pl.violin(t4, keys=["Tregs","TH1", "TH2", "TH17", "cytotoxic", "TFH"], groupby="leiden")
computing score 'Tregs'
finished: added
'Tregs', score of gene set (adata.obs).
250 total control genes are used. (0:00:00)
computing score 'TH1'
finished: added
'TH1', score of gene set (adata.obs).
149 total control genes are used. (0:00:00)
computing score 'TH17'
WARNING: genes are not in var_names and ignored: ['CD161']
finished: added
'TH17', score of gene set (adata.obs).
100 total control genes are used. (0:00:00)
computing score 'TH2'
WARNING: genes are not in var_names and ignored: ['CRTH2']
finished: added
'TH2', score of gene set (adata.obs).
250 total control genes are used. (0:00:00)
computing score 'TFH'
finished: added
'TFH', score of gene set (adata.obs).
200 total control genes are used. (0:00:00)
computing score 'cytotoxic'
finished: added
'cytotoxic', score of gene set (adata.obs).
150 total control genes are used. (0:00:00)
sc.pl.violin(t4, keys=["Tregs","TH1", "TH2", "TH17", "cytotoxic", "TFH"], groupby="leiden")
sc.pl.umap(t4, color="leiden")
t_NaiveT=["CCR7", "IL7R", "LEF1"]
sc.tl.score_genes(t4, t_NaiveT, score_name="NaiveT")
sc.pl.violin(t4, keys="NaiveT", groupby="leiden")
computing score 'NaiveT'
finished: added
'NaiveT', score of gene set (adata.obs).
99 total control genes are used. (0:00:00)
sc.pl.violin(t4, keys="NaiveT", groupby="leiden")
sc.pl.violin(t4, keys="Tregs", groupby="leiden")
sc.pl.violin(t4, keys="IL2RA", groupby="leiden")
sc.pl.violin(t4, keys="cytotoxic", groupby="leiden")
sc.pl.umap(adata, color="leiden")
sc.pl.violin(t4, keys=["CD4"], groupby="leiden")
sc.pl.violin(t4, keys=["IL2RA", "FOXP3", "CTLA4", "LAG3", "TNFRSF18"], groupby="leiden")
sc.pl.umap(adata, color="leiden")
labelling = adata
new_cluster_names = [
'CD4 T', 'CD14 Monocytes',
'B', 'CD8 T',
'NK', 'FCGR3A Monocytes',
'Dendritic', 'Megakaryocytes']
adata.rename_categories('leiden', new_cluster_names)
t4.write("tcells.h5ad")
... storing 'feature_types' as categorical